#install.packages("lubridate")
#install.packages("ggplot2")
#install.packages("forecast")
#install.packages("here")
#install.packages("knitr")
#install.packages("kableExtra")
#install.packages("dplyr")
#install.packages("openxlsx")
#install.packages("ggthemes")
#install.packages("tidyr")
#install.packages("GGally")
#install.packages("tseries")
#install.packages("blorr")
#install.packages("car")
#install.packages("corrplot")
#install.packages("dlm")
#install.packages("randomForest")
#install.packages("Kendall")
library(lubridate)
library(ggplot2)
library(forecast)
library(here)
library(knitr)
library(kableExtra)
## Warning: package 'kableExtra' was built under R version 4.3.2
library(dplyr)
library(openxlsx)
library(ggthemes)
library(tidyr)
## Warning: package 'tidyr' was built under R version 4.3.2
library(Kendall)
library(GGally)
library(trend)
library(tseries)
library(blorr)
library(lmtest)
library(car)
library(cowplot)
## Warning: package 'cowplot' was built under R version 4.3.2
library(corrplot)
library(dlm)
library(randomForest)
library(e1071) #for SVM
#date
df_load$date<-ymd(df_load$date)
df_temp$date<-ymd(df_temp$date)
df_humid$date<-ymd(df_humid$date)
#selected needed column
df_load_processed<-df_load%>%
select(meter_id,date,daily_mean)
df_temp_processed<-df_temp[,2:28]
df_humid_processed<-df_humid[,2:28]
#as numaric
for (i in 2:27){
df_temp_processed[,i]<-as.numeric(df_temp_processed[,i])
df_humid_processed[,i]<-as.numeric(df_humid_processed[,i])
}
#load
ts_load<-ts(df_load_processed[,3],start=c(2005-01-01), frequency=365)
msts_load<-msts(df_load_processed[,3],seasonal.periods=c(7,365))
autoplot(msts_load)
#load
par(mfrow=c(1,2))
Acf(msts_load,lag.max=40,main=paste("ACF for load"),ylim=c(-0.5,1))
Pacf(msts_load,lag.max=40,main=paste("PACF for load"),ylim=c(-0.5,1))
plot_grid(
autoplot(Acf(ts_load,plot=FALSE,lag.max=40)),
autoplot(Pacf(ts_load,plot=FALSE,lag.max=40)),
nrow=1
)
#decompose
load_decompose<-decompose(msts_load)
plot(load_decompose)
#deseason
load_deseason<-seasadj(load_decompose)
plot_grid(
autoplot(load_deseason),
autoplot(Acf(load_deseason,plot=FALSE,lag.max=40)),
autoplot(Pacf(load_deseason,plot=FALSE,lag.max=40)),
nrow=1
)
#Mann-Kendall
summary(MannKendall(msts_load)) #trend in the series
## Score = 106325 , Var(Score) = 1428222848
## denominator = 2741282
## tau = 0.0388, 2-sided pvalue =0.0049019
#ADF test
print(adf.test(msts_load,alternative = "stationary")) #stationary trend
## Warning in adf.test(msts_load, alternative = "stationary"): p-value smaller
## than printed p-value
##
## Augmented Dickey-Fuller Test
##
## data: msts_load
## Dickey-Fuller = -5.3158, Lag order = 13, p-value = 0.01
## alternative hypothesis: stationary
#find out how many times is needed for differencing
ns_diff<-nsdiffs(ts_load)
cat("Number of seasonal differencing needed:", ns_diff) #no need to difference the series
## Number of seasonal differencing needed: 0
#Use the one-year testing dataset to find the best model for one-month forecast.
#training: from 2005-01-01 to 2010-05-30
#testing: from 2010-05-31 to 2011-05-31
#origin series
load_data <- df_load_processed[,3]
#splitting training and testing
nobs <- nrow(load_data)
nfor <- 365
msts_training<-subset(msts_load, end=length(msts_load)-nfor)
msts_testing<-subset(msts_load, start=length(msts_load)-nfor+1)
msts_deseason_training<-subset(load_deseason, end=length(msts_load)-nfor)
msts_deseason_testing<-subset(load_deseason, start=length(msts_load)-nfor+1)
##ETS
ETS_fit<-stlf(msts_training,h=365)
##ARIMA
#K=c(2,4)
#fit ARIMA with original time series
ARIMA_Four_fit_1<-auto.arima(msts_training,
seasonal=FALSE,
xreg=fourier(msts_training,K=c(2,4)))
#forecast with ARIMA_fit
ARIMA_Four_forecast_1<-forecast(ARIMA_Four_fit_1,
xreg=fourier(msts_training,K=c(2,4),h=365),
h=365)
#fit ARIMA with deseason time series
#ARIMA_Four_fit_1<-auto.arima(msts_deseason_training,
# seasonal=FALSE,
# xreg=fourier(msts_deseason_training,K=c(2,6)))
#forecast with ARIMA_fit
#ARIMA_Four_forecast_1<-forecast(ARIMA_Four_fit_1,
# xreg=fourier(msts_deseason_training,K=c(2,6),h=31),
# h=31)
# Add seasonality back
#season_start <- length(load_decompose$seasonal)-364
#season_end <- length(load_decompose$seasonal)-364 + 30
#ARIMA_forecast <- ARIMA_Four_forecast_1$mean + load_decompose$seasonal[season_start:season_end]
#K=c(2,12)
#fit ARIMA with original time series
ARIMA_Four_fit_2<-auto.arima(msts_training,
seasonal=FALSE,
xreg=fourier(msts_training,K=c(2,12)))
##forecast with ARIMA_fit
ARIMA_Four_forecast_2<-forecast(ARIMA_Four_fit_2,
xreg=fourier(msts_training,K=c(2,12),h=365),
h=365)
#temp
#create time series object
msts_temp<-msts(df_temp_processed[,2], seasonal.periods=c(7,365),start=c(2005,01,01))
msts_temp_training<-subset(msts_temp, end=length(msts_temp)-nfor)
msts_temp_testing<-subset(msts_temp, start=length(msts_temp)-nfor+1)
#humid
#create time series object
msts_humid<-msts(df_humid_processed[,2], seasonal.periods=c(7,365),start=c(2005,01,01))
msts_humid_training<-subset(msts_humid, end=length(msts_humid)-nfor)
msts_humid_testing<-subset(msts_humid, start=length(msts_humid)-nfor+1)
#
#K=c(2,4)
#fit ARIMA with original time series
ARIMA_Four_fit_XREG_1<-auto.arima(msts_training,
seasonal=FALSE,
#combine fourier and exogenous variable time series
xreg=as.matrix(cbind(fourier(msts_training,K=c(2,4)),
msts_temp_training)))
#generate fourier terms
fourier_terms_1<-fourier(msts_training, K=c(2,4), h = 365)
#generate forecast
temp_forecast_1<-forecast(msts_temp_training, h = 365)
#combine fourier terms and forecasts into a numeric matrix
xreg_matrix_1<-as.matrix(cbind(fourier_terms_1, temp_forecast_1$mean))
#forecast with ARIMA_fit_XREG
ARIMA_Four_forecast_XREG_1<-forecast(ARIMA_Four_fit_XREG_1,
xreg = xreg_matrix_1,
h = 365)
## Warning in forecast.forecast_ARIMA(ARIMA_Four_fit_XREG_1, xreg = xreg_matrix_1,
## : xreg contains different column names from the xreg used in training. Please
## check that the regressors are in the same order.
#K=c(2,12)
#fit ARIMA with original time series
ARIMA_Four_fit_XREG_2<-auto.arima(msts_training,
seasonal=FALSE,
#combine fourier and exogenous variable time series
xreg=as.matrix(cbind(fourier(msts_training,K=c(2,12)),
msts_temp_training)))
#generate fourier terms
fourier_terms_2<-fourier(msts_training, K=c(2,12), h = 365)
#generate forecast
temp_forecast_2<-forecast(msts_temp_training, h = 365)
#combine fourier terms and forecasts into a numeric matrix
xreg_matrix_2<-as.matrix(cbind(fourier_terms_2, temp_forecast_2$mean))
#forecast with ARIMA_fit_XREG
ARIMA_Four_forecast_XREG_2<-forecast(ARIMA_Four_fit_XREG_2,
xreg = xreg_matrix_2,
h = 365)
## Warning in forecast.forecast_ARIMA(ARIMA_Four_fit_XREG_2, xreg = xreg_matrix_2,
## : xreg contains different column names from the xreg used in training. Please
## check that the regressors are in the same order.
##NN
#p=1, P=0 because seasonal=FALSE
#K=c(2,4)
NN_fit_1<-nnetar(msts_training,
p=1,
P=0,
xreg=fourier(msts_training,K=c(2,4)))
NN_forecast_1<-forecast(NN_fit_1,
xreg=fourier(msts_training,K=c(2,4),h=365),
h=365)
#Add seasonality back
#NN_forecast <- NN_forecast_1$mean + load_decompose$seasonal[season_start:season_end]
#K=c(2,6)
NN_fit_2<-nnetar(msts_training,
p=1,
P=0,
xreg=fourier(msts_training,K=c(2,6)))
NN_forecast_2<-forecast(NN_fit_2,
xreg=fourier(msts_training,K=c(2,6),h=365),
h=365)
#K=c(2,12)
NN_fit_2<-nnetar(msts_training,
p=1,
P=0,
xreg=fourier(msts_training,K=c(2,12)))
NN_forecast_2<-forecast(NN_fit_2,
xreg=fourier(msts_training,K=c(2,12),h=365),
h=365)
#One-month forecast horizon ## NN Optimization
# Training and test sets
n_train <- round(0.8*length(load_data))
n_test <- length(load_data)-n_train
train <- ts_load[1:n_train]
test <- ts_load[(n_train+1):length(load_data)]
nn_acc_min <- 40
p_opt <- 0
P_opt <-0
for (p in seq(0:7)){
print(p)
for (P in seq(0:3)){
nn_train <- nnetar(train,p=p,P=1)
nn_testfor <- forecast(nn_train,h=n_test)
nn_acc <- accuracy(nn_testfor,test)
if ((nn_acc[9] + nn_acc[10]) < nn_acc_min){
nn_acc_min <- nn_acc[9] + nn_acc[10]
p_opt <- p
P_opt <- P
}
}
}
## [1] 1
## [1] 2
## [1] 3
## [1] 4
## [1] 5
## [1] 6
## [1] 7
## [1] 8
print(nn_acc_min, p_opt, P_opt)
## [1] 32
#Ina: this chunk does not work for me
#forecast for one month
# Fit optimized model
#nn_train_opt <- nnetar(train,p=p_opt,P=P_opt)
#nn_testfor_opt <- forecast(nn_train_opt,h=n_test)
#nn_acc_opt <- accuracy(nn_testfor_opt,petrol_test)
#cat("Optimized Neural Network: p =",p_opt, ", P =",P_opt,", Training Accuracy (MAPE):", nn_acc_opt[9],", Test Accuracy (MAPE):", nn_acc_opt[10])
# Train on whole data set and forecast for 31 days
#nn_fit <- nnetar(msts_training,p=p_opt,P=P_opt)
#nn_for <- forecast(nn_fit, h=31)
#forecast for one month
# Fit TBATS model
tbats_fit <- tbats(msts_training)
tbats_for <- forecast(tbats_fit, h=31)
#Plot results
library(ggplot2)
autoplot(ts(load_data), series="Time Series History") +
#autolayer(nn_for$fitted, series = "NN Fit") +
#autolayer(ts(tbats_for$fitted), series = "TBATS Fit") +
#autolayer(nn_for$mean, series = "NN Forecast") +
autolayer(ts(tbats_for$mean), series = "TBATS Forecast") +
xlab("Year") + ylab("Load") +
ggtitle("Neural Network Load Forecasting")
#forecast period
forecast_start<-start(ARIMA_Four_forecast_1$mean)
forecast_end<-end(ARIMA_Four_forecast_1$mean)
msts_forecast<-window(msts_load, start=forecast_start, end=forecast_end)
#plot forecasting result
#add the seasonality back when using deseason series
#ETS_fit$mean<-ETS_fit$mean+load_decompose$seasonal
#ARIMA_Four_forecast_1$mean<-ARIMA_Four_forecast_1$mean+load_decompose$seasonal
#NN_forecast_1$mean<-NN_forecast_1$mean+load_decompose$seasonal
#ARIMA_Four_forecast_XREG_1$mean<-ARIMA_Four_forecast_XREG_1$mean+load_decompose$seasonal
#with K=c(2,4)
autoplot(NN_forecast_1$mean)+
autolayer(ETS_fit,series="STL+ETS",PI=FALSE)+
autolayer(ARIMA_Four_forecast_1$mean, series="ARIMA Fourier",PI=FALSE)+
autolayer(ARIMA_Four_forecast_XREG_1$mean, series="ARIMA Fourier XREG",PI=FALSE)+
autolayer(NN_forecast_1$mean, series="Neural Network")
## Warning in ggplot2::geom_line(ggplot2::aes(x = .data[["timeVal"]], y = .data[["seriesVal"]], : Ignoring unknown parameters: `PI`
## Ignoring unknown parameters: `PI`
#with K=c(2,12) / K=c(2,6)
autoplot(msts_forecast)+
autolayer(ETS_fit,series="STL+ETS",PI=FALSE)+
autolayer(ARIMA_Four_forecast_2, series="ARIMA Fourier",PI=FALSE)+
autolayer(ARIMA_Four_forecast_XREG_2, series="ARIMA Fourier XREG",PI=FALSE)+
autolayer(NN_forecast_2, series="Neural Network") #K=c(2,6)
resi_ETS<-checkresiduals(ETS_fit)
##
## Ljung-Box test
##
## data: Residuals from STL + ETS(A,N,N)
## Q* = 1375.7, df = 395, p-value < 2.2e-16
##
## Model df: 0. Total lags used: 395
resi_ARIMA_Four_1<-checkresiduals(ARIMA_Four_forecast_1)
##
## Ljung-Box test
##
## data: Residuals from Regression with ARIMA(1,1,1) errors
## Q* = 636.85, df = 393, p-value = 7.871e-14
##
## Model df: 2. Total lags used: 395
resi_ARIMA_Four_2<-checkresiduals(ARIMA_Four_forecast_2)
##
## Ljung-Box test
##
## data: Residuals from Regression with ARIMA(1,1,1) errors
## Q* = 640.54, df = 393, p-value = 3.819e-14
##
## Model df: 2. Total lags used: 395
resi_ARIMA_Four_XREG_1<-checkresiduals(ARIMA_Four_forecast_XREG_1)
##
## Ljung-Box test
##
## data: Residuals from Regression with ARIMA(2,1,1) errors
## Q* = 443.85, df = 392, p-value = 0.03597
##
## Model df: 3. Total lags used: 395
resi_ARIMA_Four_XREG_2<-checkresiduals(ARIMA_Four_forecast_XREG_2)
##
## Ljung-Box test
##
## data: Residuals from Regression with ARIMA(2,1,1) errors
## Q* = 443.64, df = 392, p-value = 0.03653
##
## Model df: 3. Total lags used: 395
resi_NN_forecast_1<-checkresiduals(NN_forecast_1)
##
## Ljung-Box test
##
## data: Residuals from NNAR(1,7)
## Q* = 644.4, df = 395, p-value = 2.942e-14
##
## Model df: 0. Total lags used: 395
resi_NN_forecast_2<-checkresiduals(NN_forecast_2)
##
## Ljung-Box test
##
## data: Residuals from NNAR(1,15)
## Q* = 983.84, df = 395, p-value < 2.2e-16
##
## Model df: 0. Total lags used: 395
#Model1:ETS
scores_ETS<-accuracy(ETS_fit$mean,msts_testing)
#Model2: ARIMA + Fourier, K=c(2,4)
scores_ARIMA_Four_forecast_1<-accuracy(ARIMA_Four_forecast_1$mean,msts_testing)
#Model3: ARIMA + Fourier, K=c(2,12)
scores_ARIMA_Four_forecast_2<-accuracy(ARIMA_Four_forecast_2$mean,msts_testing)
#Model4: Neural network, K=c(2,4)
scores_NN_1<-accuracy(NN_forecast_1$mean,msts_testing)
#Model5: Neural network, K=c(2,6)
scores_NN_2<-accuracy(NN_forecast_2$mean,msts_testing)
#Model6: ARIMA + Fourier + XREG, K=c(2,4)
scores_ARIMA_Four_XREG_1<-accuracy(ARIMA_Four_forecast_XREG_1$mean,msts_testing)
#Model7: ARIMA + Fourier + XREG, K=c(2,12)
scores_ARIMA_Four_XREG_2<-accuracy(ARIMA_Four_forecast_XREG_2$mean,msts_testing)
#create a data frame to store the scores
scores<-as.data.frame(rbind(scores_ETS,
scores_ARIMA_Four_forecast_1,
scores_ARIMA_Four_forecast_2,
scores_ARIMA_Four_XREG_1,
scores_ARIMA_Four_XREG_2,
scores_NN_1,
scores_NN_2))
row.names(scores)<-c("ETS",
"ARIMA+Fourier, K=C(2,4)",
"ARIMA+Fourier, K=C(2,12)",
"ARIMA+Fourier+XREG, K=C(2,4)",
"ARIMA+Fourier+XREG, K=C(2,12)",
"NN, K=C(2,4)",
"NN, K=C(2,6)")
#create a comparable table
kbl(scores,
caption = "Forecast Accuracy for Electricity Demand",
digits = array(5,ncol(scores))) %>%
kable_styling(full_width = FALSE, position = "center", latex_options = "hold_position")
| ME | RMSE | MAE | MPE | MAPE | ACF1 | Theil’s U | |
|---|---|---|---|---|---|---|---|
| ETS | 84.34916 | 759.3052 | 580.6240 | -1.42490 | 14.89561 | 0.74624 | 1.43490 |
| ARIMA+Fourier, K=C(2,4) | 99.27076 | 725.1554 | 540.9276 | -1.05062 | 13.81881 | 0.77667 | 1.36068 |
| ARIMA+Fourier, K=C(2,12) | 190.83584 | 735.3196 | 536.6225 | 1.59532 | 13.19891 | 0.77211 | 1.32451 |
| ARIMA+Fourier+XREG, K=C(2,4) | 2215.52568 | 2609.1714 | 2236.9484 | 60.81112 | 61.51566 | 0.92701 | 6.00400 |
| ARIMA+Fourier+XREG, K=C(2,12) | 2195.91287 | 2601.4314 | 2220.2837 | 60.35767 | 61.17022 | 0.92918 | 6.01038 |
| NN, K=C(2,4) | 255.27192 | 723.1842 | 529.0527 | 3.70096 | 12.85880 | 0.73693 | 1.29377 |
| NN, K=C(2,6) | 239.44901 | 796.4350 | 585.9143 | 3.15991 | 14.48496 | 0.74806 | 1.46118 |
#Model3 has the lowest the RMSE and MAPE
# Assuming daily mean values are in the correct columns (adjust column names if needed)
temp_july <- mean(df_temp_processed$date >= as.Date("2011-07-01") & df_temp_processed$date <= as.Date("2011-07-31"))
humid_july <- mean(df_humid_processed$date >= as.Date("2011-07-01") & df_humid_processed$date <= as.Date("2011-07-31"))
# Generate Fourier terms for July 2011
fourier_terms_july <- fourier(msts_training, K = c(2, 12), h = 31)
# Combine Fourier terms and exogenous variables into a matrix
xreg_matrix_july <- cbind(fourier_terms_july, temp_july, humid_july)
forecast_july <- forecast(ARIMA_Four_forecast_2,
xreg = xreg_matrix_july,
h = 31)
# Plot the forecast for July 2011
autoplot(forecast_july) +
labs(title = "Forecast for Load Data - July 2011")
#Final forecast
#Train over whole dataset for final forecasting
msts_training_all<-msts(load_data,seasonal.periods=c(7,365))
NN_fit_final<-nnetar(msts_training,
p=1,
P=0,
xreg=fourier(msts_training,K=c(2,6)))
NN_forecast_final<-forecast(NN_fit_final,
xreg=fourier(msts_training,K=c(2,6),h=31),
h=31)
autoplot(NN_forecast_final)